# This script does the main in-sample counterfactual
# calculations from Section 6.2 of the paper

# See "do_counterfactuals_future.py" for the out of sample calculation
# See "do_counterfactuals_boot.py" for the bootstrap calculation
# Use "plot_counterfactuals.py" to make Figures 7 and 8

# Jacob Moscona and Karthik Sastry
# This version: last edited on September 28, 2022


##
## Import libraries
##

import numpy as np
import pandas as pd
import os

wd = '/home/karthik/Dropbox (MIT)/climate_crops/QJE_Submission/Replication/Data/'
os.chdir(wd)

# File with the shocks
historical_file = 'county_shocks.csv'
# File with two extra pieces: total area in 2017 and price index
extra_file = 'county_extra_for_counterfactual.csv'

    
# If true, do calculation holding fixed prices
priceFlag = False
# If true, use coefficients estimated with weights
useWt = False

##
## Set options
##

# NOTE
# These need to be manually copied and supplied

# Panel data specification
fe_file = 'gdd_panel_1950_2010_fe.xls'
beta = -0.000232
gamma = -0.000823
phi = 9.12e-08
k = 7.828


if priceFlag: # Use the coefficients from the specification that holds price fixed
    ### GDD, prices panel, 1950 to 2010
    fe_file = 'gdd_panel_1950_2010_prices_fe.xls' # Fix!!
    beta = -0.000205
    gamma = -0.000794
    phi = 8.71e-08
    k = 7.334
    
    priceC = 0.339
    priceInt = -1.1e-5

if useWt:
    ## GDD, panel, 1950 to 2010, weighted
    fe_file = 'gdd_panel_1950_2010_weighted_fe.xls'
    beta = -0.000390
    gamma = -0.00110
    phi = 1.28e-07
    k = 8.542



t0 = 1960 # Initial decade
t1 = 2010 # Final decade

metric = 'gdd' # Name of the metric being used

##
## Load data
##

# Historical values of own, leave one out, etc.
dta = pd.read_csv(historical_file)

# Location and state x time FE
dta_fe = pd.read_excel('../Data/' + fe_file)
dta_fe.columns = ['year','CNTY_FIPS','STATE_FIPS','area','area_init','ife','tfe']

# Merge IFE
dta = dta.merge(dta_fe.loc[dta_fe.year == t0,['CNTY_FIPS','STATE_FIPS','ife']], on =
                ['CNTY_FIPS','STATE_FIPS'])
# Merge TFE0
dta = dta.merge(dta_fe.loc[dta_fe.year == t0,['CNTY_FIPS','STATE_FIPS','tfe']], on =
                ['CNTY_FIPS','STATE_FIPS'])
dta = dta.rename(columns = {'tfe':'tfe0'})
# Merge TFE1
dta = dta.merge(dta_fe.loc[dta_fe.year == t1,['CNTY_FIPS','STATE_FIPS','tfe']], on =
                ['CNTY_FIPS','STATE_FIPS'])    
dta = dta.rename(columns = {'tfe':'tfe1'})
# Merge initial area
dta = dta.merge(dta_fe.loc[dta_fe.year == t0,['CNTY_FIPS','STATE_FIPS','area_init']], on =
                ['CNTY_FIPS','STATE_FIPS'])    


# Merge extra columns
dta_extra = pd.read_csv(extra_file)
dta = dta.merge(dta_extra, on =['CNTY_FIPS','STATE_FIPS'])

rho_i = dta['ife'].values
alpha = [[],[]]
alpha[0] = dta['tfe0'].values+k
alpha[1] = dta['tfe1'].values+k
area = dta.area_init.values


if priceFlag:
    # Add data on prices    
    price0 = dta['logprice_own_' + str(t0)].values
    price1 = dta['logprice_own_' + str(t1)].values
    
##
## Main function
##
    
def fitted_values(own,loo,interact,alpha,rho):
    return rho + alpha + beta * own + gamma * loo + phi * (interact)
    
def calculate_land_values(own0,loo0,own1,loo1,rho_i,offset=0):
    
    # Calculates the fitted land value in each county
    # Plus a counterfactual that holds constant loo for the calculation of 
    # interact in the future period
    
    fit0 = fitted_values(own0,loo0,own0*loo0,alpha[0],rho_i)
    fit1 = fitted_values(own1,loo1,own1*loo1,alpha[1],rho_i)
    
    int_noi = own1 * (loo0 + offset) # counterfactual eco interact
    
    fit_noi = fitted_values(own1,loo1,int_noi,alpha[1],rho_i) # no innovation
    
    fit_nocc = fitted_values(own0,loo0,own0*loo0,alpha[1],rho_i)
    
        
    if priceFlag: # Add price corrections
        fit0 += priceC * price0 + priceInt * (price0 * own0)
        fit1 += priceC * price1 + priceInt * (price1 * own1)
        fit_noi += priceC * price1 + priceInt * (price1 * own1)
        fit_nocc += priceC * price1 + priceInt * (price1 * own0)
        
        
    return(fit0,fit1,fit_noi,fit_nocc)
    
##
## Calculation
##
  
own0 = dta[metric + '_own_' + str(t0)]
loo0 = dta[metric + '_loo_' + str(t0)]
own1 = dta[metric + '_own_' + str(t1)]
loo1 = dta[metric + '_loo_' + str(t1)]

# Treating the zero as zero; offset = 0
fit0,fit1,fit_noi,fit_nocc = calculate_land_values(
        own0,loo0,own1,loo1,
            rho_i,
            offset = 0)

# Aggregate
def logmean(x):
    return np.log(np.nanmean(np.exp(x))) 
awt = area
loss_with_innovation = np.nansum(awt* np.exp(fit_nocc) - awt* np.exp(fit1))
loss_without_innovation = np.nansum(awt* np.exp(fit_nocc) - awt* np.exp(fit_noi))

innovation_saved = loss_without_innovation - loss_with_innovation
big_kahuna = 100 * innovation_saved / loss_without_innovation


median_help = 100 * np.nanmedian(fit1-fit_noi) # Median percentage that is helped
mean_help = 100 * np.nanmean(fit1-fit_noi) # Median percentage that is helped
percent_damage = 100 * loss_with_innovation / np.nansum(awt* np.exp(fit_nocc))
percent_damage_noi = 100 * loss_without_innovation / np.nansum(awt* np.exp(fit_nocc))

print('The damage is ' + str(round(percent_damage,2)) + ' percent\n')
print('The damage without innovation is ' + str(round(percent_damage_noi,2)) + ' percent\n')
print('The percent saved is ' + str(round(big_kahuna,2)) + ' percent\n')
print('Dollar savings are '+ str(round(innovation_saved/1e9,2)) + ' billion\n')

total_val = np.nansum(awt* np.exp(fit1))
noi_val = np.nansum(awt* np.exp(fit_noi))
nocc_val = np.nansum(awt* np.exp(fit_nocc))

print('The dollar value is '+ str(round(total_val/1e12,4)) + ' trillion\n')
print('The dollar value, no I, is '+ str(round(noi_val/1e12,4)) + ' trillion\n')
print('The dollar value, no CC, is '+ str(round(nocc_val/1e12,4)) + ' trillion\n')

